In [1]:
%run "../0. config.ipynb"


2008

data preparation


In [2]:
list_types = list(df_raw.type.unique())

counting number of events per player ~ game


In [3]:
def compute_players_type_events(logs):
  df = logs[["playerId", "type"]].copy()
  df["count"] = 1

  df = df.groupby(["playerId", "type"]).agg({ "count" : np.sum })

  df = df.unstack()
  df.columns = df.columns.droplevel()
  df = df.reset_index()
  df = df.fillna(0)
  df = df.set_index([ "playerId" ])
#   print len(df.index)
#   print df.columns
  
  return df

compute_players_type_events(df_raw).head()


Out[3]:
type complete configure craft death equip gotomooc gotourl pickup reach restart selectmenu start switch unequip
playerId
0023dbb1-7f98-4cdb-8122-722f801f40b3 0.0 1.0 0.0 3.0 0.0 0.0 0.0 1.0 2.0 0.0 1.0 0.0 0.0 0.0
01b0c435-f0c0-4bfd-9189-86fc0d29b163 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0
02bc076b-32aa-467a-bbc6-b746abedb7bd 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0
02c6953a-0417-4858-8efb-1989be9f6b9d 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 2.0 1.0 1.0 0.0
0306db66-081d-4035-b30f-8358469d6ec3 1.0 0.0 3.0 17.0 8.0 0.0 1.0 5.0 12.0 1.0 2.0 0.0 0.0 2.0

compute game durations


In [4]:
def compute_game_durations(logs):

  df = logs.groupby("playerId").agg({ "serverTime": [ np.min, np.max  ] })

  df["duration"] = pd.to_datetime(df["serverTime"]["amax"]) - pd.to_datetime(df["serverTime"]["amin"])
  df["duration"] = df["duration"].map(lambda x: np.timedelta64(x, 's'))

  df = df.loc[:, [("duration", "")]]

  df.columns = df.columns.droplevel()
  df.columns = [ "duration (seconds)" ]

  df["duration (seconds)"] = df["duration (seconds)"].astype(int)/1000000000
  
  return df

compute_game_durations(df_raw).head()


Out[4]:
duration (seconds)
playerId
0023dbb1-7f98-4cdb-8122-722f801f40b3 175.0
01b0c435-f0c0-4bfd-9189-86fc0d29b163 0.0
02bc076b-32aa-467a-bbc6-b746abedb7bd 0.0
02c6953a-0417-4858-8efb-1989be9f6b9d 19.0
0306db66-081d-4035-b30f-8358469d6ec3 1030.0

compute max checkpoint reached by players


In [5]:
def max_reach(x):
  checkpoints = [ int(checkpoint[-2:]) for checkpoint in x if checkpoint != np.nan and not isinstance(checkpoint, float)]
  
  # print checkpoints
  
  if len(checkpoints) > 0:
    return max([ int(checkpoint[-2:]) for checkpoint in x if checkpoint != np.nan and not isinstance(checkpoint, float)])
  else:
    return 0

def adventure(x):
  return "adventure1" in [ w.split(".")[0] for w in x if w != np.nan and not isinstance(w, float) ]

def sandbox(x):
  l = { w.split(".")[0] for w in x if w != np.nan and not isinstance(w, float) }
  
  return ("sandbox1" in l) or ("sandbox2" in l)

def compute_max_reachpoint(logs):
  df = logs.loc[:, ["playerId", "type", "section"]].groupby("playerId").agg({ "section": [ max_reach, adventure, sandbox ]  })
  
  df.columns = df.columns.droplevel()
  
  return df

compute_max_reachpoint(df_raw).head()


Out[5]:
max_reach adventure sandbox
playerId
0023dbb1-7f98-4cdb-8122-722f801f40b3 2 True False
01b0c435-f0c0-4bfd-9189-86fc0d29b163 0 False False
02bc076b-32aa-467a-bbc6-b746abedb7bd 0 False False
02c6953a-0417-4858-8efb-1989be9f6b9d 1 False True
0306db66-081d-4035-b30f-8358469d6ec3 8 True False

compile sessionId


In [6]:
def compile_sessionid(raw):
  def sessionid(x):
    
    def norm(t):
      if t != np.nan and not isinstance(t, float) :
        return t.replace("\"", "")
      else:
        return None

    res = { norm(s) for s in x if norm(s) != None }

    if len(res) > 0:
      return list(res)[0]
  
  df = raw.loc[:, ["playerId", "customData.localplayerguid"]].groupby("playerId").agg({ "customData.localplayerguid" : sessionid })
  
  # df["customData.localplayerguid"] = df["customData.localplayerguid"].map(sessionid)

  #df = df.set_index("playerId")
  df = df.rename(columns={ "customData.localplayerguid": "sessionId" })
  
  return df

compile_sessionid(df_raw[0:100]).head()


Out[6]:
sessionId
playerId
3607b3ec-9e2a-4043-9f07-8dbccee66cb3 None
4acee9a7-7295-44c6-85ec-b247a3b483b1 4a9fded1-7ab7-48f5-b267-2fac6627ea5a
5af5f837-2aa1-4d5e-86a3-05d39d6cf63d 4a9fded1-7ab7-48f5-b267-2fac6627ea5a
85de8fbc-b510-47c6-bfcd-ce829712c379 8c83d8d2-f63a-49b4-b0da-2712080fc4d1
9c183209-8867-46bb-9d29-9dda57e5a7bd 6e665720-b387-423f-8dab-1efade2f0c63

aggregate all data into one data frame


In [7]:
players_stats = compute_players_type_events(df_raw)
players_stats = pd.merge(players_stats, compute_game_durations(df_raw), left_index=True, right_index=True)
players_stats = pd.merge(players_stats, compute_max_reachpoint(df_raw), left_index=True, right_index=True)
players_stats = pd.merge(players_stats, compile_sessionid(df_raw), left_index=True, right_index=True)

In [8]:
len(players_stats[ (players_stats["adventure"] == False) & (players_stats["sandbox"] == False) ])


Out[8]:
62

In [9]:
len(players_stats[ (players_stats["adventure"] == True) & (players_stats["sandbox"] == True) ])


Out[9]:
15

In [10]:
len(players_stats[ (players_stats["sandbox"] == True) ])


Out[10]:
20

preview


In [11]:
players_stats.head()


Out[11]:
type complete configure craft death equip gotomooc gotourl pickup reach restart selectmenu start switch unequip duration (seconds) max_reach adventure sandbox sessionId
playerId
0023dbb1-7f98-4cdb-8122-722f801f40b3 0.0 1.0 0.0 3.0 0.0 0.0 0.0 1.0 2.0 0.0 1.0 0.0 0.0 0.0 175.0 2 True False None
01b0c435-f0c0-4bfd-9189-86fc0d29b163 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0 False False e3e1604c-b94b-4669-abc9-01ae57e9b691
02bc076b-32aa-467a-bbc6-b746abedb7bd 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0 False False 32506a02-7174-46f2-89e2-8ffd1089334b
02c6953a-0417-4858-8efb-1989be9f6b9d 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 2.0 1.0 1.0 0.0 19.0 1 False True 3e4c5d28-c240-4024-b024-9f6c37375b77
0306db66-081d-4035-b30f-8358469d6ec3 1.0 0.0 3.0 17.0 8.0 0.0 1.0 5.0 12.0 1.0 2.0 0.0 0.0 2.0 1030.0 8 True False None

In [13]:
players_stats.to_csv("players_stats.csv", encoding="utf-8")